/**
 * 
 */
package edu.umd.clip.lm.tools;

import java.io.*;
import java.nio.channels.*;

import edu.berkeley.nlp.util.*;
import edu.umd.clip.lm.model.data.*;
/**
 * @author Denis Filimonov <den@cs.umd.edu>
 *
 */
public class TrainingDataInfo {

	public static class Options {
        @Option(name = "-input", required = true, usage = "Training data file")
		public String input;
	}
	/**
	 * @param args
	 * @throws IOException 
	 */
	public static void main(String[] args) throws IOException {
        OptionParser optParser = new OptionParser(Options.class);
        Options opts = (Options) optParser.parse(args, true);


		ReadableByteChannel channel = new FileInputStream(opts.input).getChannel();
		TrainingDataReader reader = new OnDiskTrainingDataReader(channel);
		ReadableTrainingData inputData = new ReadableTrainingData(reader);
		
		long totalCount = 0;
		long totalRecords = 0;
		long totalBlocks = 0;
		
		long actualTotalCount = 0;
		long actualTotalRecords = 0;
		
		while(inputData.hasNext()) {
			TrainingDataBlock block = inputData.next();
			
			long actualRecords = 0;
			long actualCount = 0;
			
			for(ContextFuturesPair pair : block) {
				++actualRecords;
				
				for(TupleCountPair tc : pair.getFutures()) {
					assert(tc.count > 0);
					actualCount += tc.count;
				}
			}
			
			System.out.printf("block #%02d: %d records (actually %d), %d events (actually %d)\n", 
					totalBlocks, block.size(), actualRecords, block.getTotalCount(), actualCount);
			
			actualTotalCount += actualCount;
			actualTotalRecords += actualRecords;
			
			totalBlocks += 1;
			totalRecords += block.size();
			totalCount += block.getTotalCount();
		}
		
		System.out.printf("%d blocks, %d records (actually %d), %d events (actually %d)\n", 
				totalBlocks, totalRecords, actualTotalRecords, totalCount, actualTotalCount);
	}

}
